tree-ssa-loop-im.c: Include hashtab.h.
[gcc.git] / gcc / tree-ssa-loop-im.c
1 /* Loop invariant motion.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
19 02111-1307, USA. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hard-reg-set.h"
29 #include "basic-block.h"
30 #include "output.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "cfgloop.h"
36 #include "domwalk.h"
37 #include "params.h"
38 #include "tree-pass.h"
39 #include "flags.h"
40 #include "real.h"
41 #include "hashtab.h"
42
43 /* TODO: Support for predicated code motion. I.e.
44
45 while (1)
46 {
47 if (cond)
48 {
49 a = inv;
50 something;
51 }
52 }
53
54 Where COND and INV are is invariants, but evaluating INV may trap or be
55 invalid from some other reason if !COND. This may be transformed to
56
57 if (cond)
58 a = inv;
59 while (1)
60 {
61 if (cond)
62 something;
63 } */
64
65 /* A type for the list of statements that have to be moved in order to be able
66 to hoist an invariant computation. */
67
68 struct depend
69 {
70 tree stmt;
71 struct depend *next;
72 };
73
74 /* The auxiliary data kept for each statement. */
75
76 struct lim_aux_data
77 {
78 struct loop *max_loop; /* The outermost loop in that the statement
79 is invariant. */
80
81 struct loop *tgt_loop; /* The loop out of that we want to move the
82 invariant. */
83
84 struct loop *always_executed_in;
85 /* The outermost loop for that we are sure
86 the statement is executed if the loop
87 is entered. */
88
89 bool sm_done; /* True iff the store motion for a memory
90 reference in the statement has already
91 been executed. */
92
93 unsigned cost; /* Cost of the computation performed by the
94 statement. */
95
96 struct depend *depends; /* List of statements that must be also hoisted
97 out of the loop when this statement is
98 hoisted; i.e. those that define the operands
99 of the statement and are inside of the
100 MAX_LOOP loop. */
101 };
102
103 #define LIM_DATA(STMT) (TREE_CODE (STMT) == PHI_NODE \
104 ? NULL \
105 : (struct lim_aux_data *) (stmt_ann (STMT)->aux))
106
107 /* Description of a memory reference location for store motion. */
108
109 struct mem_ref_loc
110 {
111 tree *ref; /* The reference itself. */
112 tree stmt; /* The statement in that it occurs. */
113 struct mem_ref_loc *next; /* Next use in the chain. */
114 };
115
116 /* Description of a memory reference for store motion. */
117
118 struct mem_ref
119 {
120 tree mem; /* The memory itself. */
121 hashval_t hash; /* Its hash value. */
122 bool is_stored; /* True if there is a store to the location
123 in the loop. */
124 struct mem_ref_loc *locs; /* The locations where it is found. */
125 bitmap vops; /* Vops corresponding to this memory
126 location. */
127 };
128
129 /* Minimum cost of an expensive expression. */
130 #define LIM_EXPENSIVE ((unsigned) PARAM_VALUE (PARAM_LIM_EXPENSIVE))
131
132 /* The outermost loop for that execution of the header guarantees that the
133 block will be executed. */
134 #define ALWAYS_EXECUTED_IN(BB) ((struct loop *) (BB)->aux)
135
136 /* Calls CBCK for each index in memory reference ADDR_P. There are two
137 kinds situations handled; in each of these cases, the memory reference
138 and DATA are passed to the callback:
139
140 Access to an array: ARRAY_{RANGE_}REF (base, index). In this case we also
141 pass the pointer to the index to the callback.
142
143 Pointer dereference: INDIRECT_REF (addr). In this case we also pass the
144 pointer to addr to the callback.
145
146 If the callback returns false, the whole search stops and false is returned.
147 Otherwise the function returns true after traversing through the whole
148 reference *ADDR_P. */
149
150 bool
151 for_each_index (tree *addr_p, bool (*cbck) (tree, tree *, void *), void *data)
152 {
153 tree *nxt, *idx;
154
155 for (; ; addr_p = nxt)
156 {
157 switch (TREE_CODE (*addr_p))
158 {
159 case SSA_NAME:
160 return cbck (*addr_p, addr_p, data);
161
162 case MISALIGNED_INDIRECT_REF:
163 case ALIGN_INDIRECT_REF:
164 case INDIRECT_REF:
165 nxt = &TREE_OPERAND (*addr_p, 0);
166 return cbck (*addr_p, nxt, data);
167
168 case BIT_FIELD_REF:
169 case VIEW_CONVERT_EXPR:
170 case ARRAY_RANGE_REF:
171 case REALPART_EXPR:
172 case IMAGPART_EXPR:
173 nxt = &TREE_OPERAND (*addr_p, 0);
174 break;
175
176 case COMPONENT_REF:
177 /* If the component has varying offset, it behaves like index
178 as well. */
179 idx = &TREE_OPERAND (*addr_p, 2);
180 if (*idx
181 && !cbck (*addr_p, idx, data))
182 return false;
183
184 nxt = &TREE_OPERAND (*addr_p, 0);
185 break;
186
187 case ARRAY_REF:
188 nxt = &TREE_OPERAND (*addr_p, 0);
189 if (!cbck (*addr_p, &TREE_OPERAND (*addr_p, 1), data))
190 return false;
191 break;
192
193 case VAR_DECL:
194 case PARM_DECL:
195 case STRING_CST:
196 case RESULT_DECL:
197 return true;
198
199 default:
200 gcc_unreachable ();
201 }
202 }
203 }
204
205 /* If it is possible to hoist the statement STMT unconditionally,
206 returns MOVE_POSSIBLE.
207 If it is possible to hoist the statement STMT, but we must avoid making
208 it executed if it would not be executed in the original program (e.g.
209 because it may trap), return MOVE_PRESERVE_EXECUTION.
210 Otherwise return MOVE_IMPOSSIBLE. */
211
212 enum move_pos
213 movement_possibility (tree stmt)
214 {
215 tree lhs, rhs;
216
217 if (flag_unswitch_loops
218 && TREE_CODE (stmt) == COND_EXPR)
219 {
220 /* If we perform unswitching, force the operands of the invariant
221 condition to be moved out of the loop. */
222 return MOVE_POSSIBLE;
223 }
224
225 if (TREE_CODE (stmt) != MODIFY_EXPR)
226 return MOVE_IMPOSSIBLE;
227
228 if (stmt_ends_bb_p (stmt))
229 return MOVE_IMPOSSIBLE;
230
231 if (stmt_ann (stmt)->has_volatile_ops)
232 return MOVE_IMPOSSIBLE;
233
234 lhs = TREE_OPERAND (stmt, 0);
235 if (TREE_CODE (lhs) == SSA_NAME
236 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
237 return MOVE_IMPOSSIBLE;
238
239 rhs = TREE_OPERAND (stmt, 1);
240
241 if (TREE_SIDE_EFFECTS (rhs))
242 return MOVE_IMPOSSIBLE;
243
244 if (TREE_CODE (lhs) != SSA_NAME
245 || tree_could_trap_p (rhs))
246 return MOVE_PRESERVE_EXECUTION;
247
248 if (get_call_expr_in (stmt))
249 {
250 /* While pure or const call is guaranteed to have no side effects, we
251 cannot move it arbitrarily. Consider code like
252
253 char *s = something ();
254
255 while (1)
256 {
257 if (s)
258 t = strlen (s);
259 else
260 t = 0;
261 }
262
263 Here the strlen call cannot be moved out of the loop, even though
264 s is invariant. In addition to possibly creating a call with
265 invalid arguments, moving out a function call that is not executed
266 may cause performance regressions in case the call is costly and
267 not executed at all. */
268 return MOVE_PRESERVE_EXECUTION;
269 }
270 return MOVE_POSSIBLE;
271 }
272
273 /* Suppose that operand DEF is used inside the LOOP. Returns the outermost
274 loop to that we could move the expression using DEF if it did not have
275 other operands, i.e. the outermost loop enclosing LOOP in that the value
276 of DEF is invariant. */
277
278 static struct loop *
279 outermost_invariant_loop (tree def, struct loop *loop)
280 {
281 tree def_stmt;
282 basic_block def_bb;
283 struct loop *max_loop;
284
285 if (TREE_CODE (def) != SSA_NAME)
286 return superloop_at_depth (loop, 1);
287
288 def_stmt = SSA_NAME_DEF_STMT (def);
289 def_bb = bb_for_stmt (def_stmt);
290 if (!def_bb)
291 return superloop_at_depth (loop, 1);
292
293 max_loop = find_common_loop (loop, def_bb->loop_father);
294
295 if (LIM_DATA (def_stmt) && LIM_DATA (def_stmt)->max_loop)
296 max_loop = find_common_loop (max_loop,
297 LIM_DATA (def_stmt)->max_loop->outer);
298 if (max_loop == loop)
299 return NULL;
300 max_loop = superloop_at_depth (loop, max_loop->depth + 1);
301
302 return max_loop;
303 }
304
305 /* Returns the outermost superloop of LOOP in that the expression EXPR is
306 invariant. */
307
308 static struct loop *
309 outermost_invariant_loop_expr (tree expr, struct loop *loop)
310 {
311 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
312 unsigned i, nops;
313 struct loop *max_loop = superloop_at_depth (loop, 1), *aloop;
314
315 if (TREE_CODE (expr) == SSA_NAME
316 || TREE_CODE (expr) == INTEGER_CST
317 || is_gimple_min_invariant (expr))
318 return outermost_invariant_loop (expr, loop);
319
320 if (class != tcc_unary
321 && class != tcc_binary
322 && class != tcc_expression
323 && class != tcc_comparison)
324 return NULL;
325
326 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
327 for (i = 0; i < nops; i++)
328 {
329 aloop = outermost_invariant_loop_expr (TREE_OPERAND (expr, i), loop);
330 if (!aloop)
331 return NULL;
332
333 if (flow_loop_nested_p (max_loop, aloop))
334 max_loop = aloop;
335 }
336
337 return max_loop;
338 }
339
340 /* DATA is a structure containing information associated with a statement
341 inside LOOP. DEF is one of the operands of this statement.
342
343 Find the outermost loop enclosing LOOP in that value of DEF is invariant
344 and record this in DATA->max_loop field. If DEF itself is defined inside
345 this loop as well (i.e. we need to hoist it out of the loop if we want
346 to hoist the statement represented by DATA), record the statement in that
347 DEF is defined to the DATA->depends list. Additionally if ADD_COST is true,
348 add the cost of the computation of DEF to the DATA->cost.
349
350 If DEF is not invariant in LOOP, return false. Otherwise return TRUE. */
351
352 static bool
353 add_dependency (tree def, struct lim_aux_data *data, struct loop *loop,
354 bool add_cost)
355 {
356 tree def_stmt = SSA_NAME_DEF_STMT (def);
357 basic_block def_bb = bb_for_stmt (def_stmt);
358 struct loop *max_loop;
359 struct depend *dep;
360
361 if (!def_bb)
362 return true;
363
364 max_loop = outermost_invariant_loop (def, loop);
365 if (!max_loop)
366 return false;
367
368 if (flow_loop_nested_p (data->max_loop, max_loop))
369 data->max_loop = max_loop;
370
371 if (!LIM_DATA (def_stmt))
372 return true;
373
374 if (add_cost
375 /* Only add the cost if the statement defining DEF is inside LOOP,
376 i.e. if it is likely that by moving the invariants dependent
377 on it, we will be able to avoid creating a new register for
378 it (since it will be only used in these dependent invariants). */
379 && def_bb->loop_father == loop)
380 data->cost += LIM_DATA (def_stmt)->cost;
381
382 dep = xmalloc (sizeof (struct depend));
383 dep->stmt = def_stmt;
384 dep->next = data->depends;
385 data->depends = dep;
386
387 return true;
388 }
389
390 /* Returns an estimate for a cost of statement STMT. TODO -- the values here
391 are just ad-hoc constants. The estimates should be based on target-specific
392 values. */
393
394 static unsigned
395 stmt_cost (tree stmt)
396 {
397 tree rhs;
398 unsigned cost = 1;
399
400 /* Always try to create possibilities for unswitching. */
401 if (TREE_CODE (stmt) == COND_EXPR)
402 return LIM_EXPENSIVE;
403
404 rhs = TREE_OPERAND (stmt, 1);
405
406 /* Hoisting memory references out should almost surely be a win. */
407 if (stmt_references_memory_p (stmt))
408 cost += 20;
409
410 switch (TREE_CODE (rhs))
411 {
412 case CALL_EXPR:
413 /* We should be hoisting calls if possible. */
414
415 /* Unless the call is a builtin_constant_p; this always folds to a
416 constant, so moving it is useless. */
417 rhs = get_callee_fndecl (rhs);
418 if (DECL_BUILT_IN_CLASS (rhs) == BUILT_IN_NORMAL
419 && DECL_FUNCTION_CODE (rhs) == BUILT_IN_CONSTANT_P)
420 return 0;
421
422 cost += 20;
423 break;
424
425 case MULT_EXPR:
426 case TRUNC_DIV_EXPR:
427 case CEIL_DIV_EXPR:
428 case FLOOR_DIV_EXPR:
429 case ROUND_DIV_EXPR:
430 case EXACT_DIV_EXPR:
431 case CEIL_MOD_EXPR:
432 case FLOOR_MOD_EXPR:
433 case ROUND_MOD_EXPR:
434 case TRUNC_MOD_EXPR:
435 case RDIV_EXPR:
436 /* Division and multiplication are usually expensive. */
437 cost += 20;
438 break;
439
440 default:
441 break;
442 }
443
444 return cost;
445 }
446
447 /* Determine the outermost loop to that it is possible to hoist a statement
448 STMT and store it to LIM_DATA (STMT)->max_loop. To do this we determine
449 the outermost loop in that the value computed by STMT is invariant.
450 If MUST_PRESERVE_EXEC is true, additionally choose such a loop that
451 we preserve the fact whether STMT is executed. It also fills other related
452 information to LIM_DATA (STMT).
453
454 The function returns false if STMT cannot be hoisted outside of the loop it
455 is defined in, and true otherwise. */
456
457 static bool
458 determine_max_movement (tree stmt, bool must_preserve_exec)
459 {
460 basic_block bb = bb_for_stmt (stmt);
461 struct loop *loop = bb->loop_father;
462 struct loop *level;
463 struct lim_aux_data *lim_data = LIM_DATA (stmt);
464 tree val;
465 ssa_op_iter iter;
466
467 if (must_preserve_exec)
468 level = ALWAYS_EXECUTED_IN (bb);
469 else
470 level = superloop_at_depth (loop, 1);
471 lim_data->max_loop = level;
472
473 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_USE)
474 if (!add_dependency (val, lim_data, loop, true))
475 return false;
476
477 FOR_EACH_SSA_TREE_OPERAND (val, stmt, iter, SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
478 if (!add_dependency (val, lim_data, loop, false))
479 return false;
480
481 lim_data->cost += stmt_cost (stmt);
482
483 return true;
484 }
485
486 /* Suppose that some statement in ORIG_LOOP is hoisted to the loop LEVEL,
487 and that one of the operands of this statement is computed by STMT.
488 Ensure that STMT (together with all the statements that define its
489 operands) is hoisted at least out of the loop LEVEL. */
490
491 static void
492 set_level (tree stmt, struct loop *orig_loop, struct loop *level)
493 {
494 struct loop *stmt_loop = bb_for_stmt (stmt)->loop_father;
495 struct depend *dep;
496
497 stmt_loop = find_common_loop (orig_loop, stmt_loop);
498 if (LIM_DATA (stmt) && LIM_DATA (stmt)->tgt_loop)
499 stmt_loop = find_common_loop (stmt_loop,
500 LIM_DATA (stmt)->tgt_loop->outer);
501 if (flow_loop_nested_p (stmt_loop, level))
502 return;
503
504 gcc_assert (LIM_DATA (stmt));
505 gcc_assert (level == LIM_DATA (stmt)->max_loop
506 || flow_loop_nested_p (LIM_DATA (stmt)->max_loop, level));
507
508 LIM_DATA (stmt)->tgt_loop = level;
509 for (dep = LIM_DATA (stmt)->depends; dep; dep = dep->next)
510 set_level (dep->stmt, orig_loop, level);
511 }
512
513 /* Determines an outermost loop from that we want to hoist the statement STMT.
514 For now we chose the outermost possible loop. TODO -- use profiling
515 information to set it more sanely. */
516
517 static void
518 set_profitable_level (tree stmt)
519 {
520 set_level (stmt, bb_for_stmt (stmt)->loop_father, LIM_DATA (stmt)->max_loop);
521 }
522
523 /* Returns true if STMT is not a pure call. */
524
525 static bool
526 nonpure_call_p (tree stmt)
527 {
528 tree call = get_call_expr_in (stmt);
529
530 if (!call)
531 return false;
532
533 return TREE_SIDE_EFFECTS (call) != 0;
534 }
535
536 /* Releases the memory occupied by DATA. */
537
538 static void
539 free_lim_aux_data (struct lim_aux_data *data)
540 {
541 struct depend *dep, *next;
542
543 for (dep = data->depends; dep; dep = next)
544 {
545 next = dep->next;
546 free (dep);
547 }
548 free (data);
549 }
550
551 /* Determine the outermost loops in that statements in basic block BB are
552 invariant, and record them to the LIM_DATA associated with the statements.
553 Callback for walk_dominator_tree. */
554
555 static void
556 determine_invariantness_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
557 basic_block bb)
558 {
559 enum move_pos pos;
560 block_stmt_iterator bsi;
561 tree stmt, rhs;
562 bool maybe_never = ALWAYS_EXECUTED_IN (bb) == NULL;
563 struct loop *outermost = ALWAYS_EXECUTED_IN (bb);
564
565 if (!bb->loop_father->outer)
566 return;
567
568 if (dump_file && (dump_flags & TDF_DETAILS))
569 fprintf (dump_file, "Basic block %d (loop %d -- depth %d):\n\n",
570 bb->index, bb->loop_father->num, bb->loop_father->depth);
571
572 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
573 {
574 stmt = bsi_stmt (bsi);
575
576 pos = movement_possibility (stmt);
577 if (pos == MOVE_IMPOSSIBLE)
578 {
579 if (nonpure_call_p (stmt))
580 {
581 maybe_never = true;
582 outermost = NULL;
583 }
584 continue;
585 }
586
587 /* If divisor is invariant, convert a/b to a*(1/b), allowing reciprocal
588 to be hoisted out of loop, saving expensive divide. */
589 if (pos == MOVE_POSSIBLE
590 && (rhs = TREE_OPERAND (stmt, 1)) != NULL
591 && TREE_CODE (rhs) == RDIV_EXPR
592 && flag_unsafe_math_optimizations
593 && outermost_invariant_loop_expr (TREE_OPERAND (rhs, 1),
594 loop_containing_stmt (stmt)) != NULL
595 && outermost_invariant_loop_expr (rhs,
596 loop_containing_stmt (stmt)) == NULL)
597 {
598 tree lhs, stmt1, stmt2, var, name;
599
600 lhs = TREE_OPERAND (stmt, 0);
601
602 /* stmt must be MODIFY_EXPR. */
603 var = create_tmp_var (TREE_TYPE (rhs), "reciptmp");
604 add_referenced_tmp_var (var);
605
606 stmt1 = build2 (MODIFY_EXPR, void_type_node, var,
607 build2 (RDIV_EXPR, TREE_TYPE (rhs),
608 build_real (TREE_TYPE (rhs), dconst1),
609 TREE_OPERAND (rhs, 1)));
610 name = make_ssa_name (var, stmt1);
611 TREE_OPERAND (stmt1, 0) = name;
612 stmt2 = build2 (MODIFY_EXPR, void_type_node, lhs,
613 build2 (MULT_EXPR, TREE_TYPE (rhs),
614 name, TREE_OPERAND (rhs, 0)));
615
616 /* Replace division stmt with reciprocal and multiply stmts.
617 The multiply stmt is not invariant, so update iterator
618 and avoid rescanning. */
619 bsi_replace (&bsi, stmt1, true);
620 bsi_insert_after (&bsi, stmt2, BSI_NEW_STMT);
621 SSA_NAME_DEF_STMT (lhs) = stmt2;
622
623 /* Continue processing with invariant reciprocal statment. */
624 stmt = stmt1;
625 }
626
627 stmt_ann (stmt)->aux = xcalloc (1, sizeof (struct lim_aux_data));
628 LIM_DATA (stmt)->always_executed_in = outermost;
629
630 if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
631 continue;
632
633 if (!determine_max_movement (stmt, pos == MOVE_PRESERVE_EXECUTION))
634 {
635 LIM_DATA (stmt)->max_loop = NULL;
636 continue;
637 }
638
639 if (dump_file && (dump_flags & TDF_DETAILS))
640 {
641 print_generic_stmt_indented (dump_file, stmt, 0, 2);
642 fprintf (dump_file, " invariant up to level %d, cost %d.\n\n",
643 LIM_DATA (stmt)->max_loop->depth,
644 LIM_DATA (stmt)->cost);
645 }
646
647 if (LIM_DATA (stmt)->cost >= LIM_EXPENSIVE)
648 set_profitable_level (stmt);
649 }
650 }
651
652 /* For each statement determines the outermost loop in that it is invariant,
653 statements on whose motion it depends and the cost of the computation.
654 This information is stored to the LIM_DATA structure associated with
655 each statement. */
656
657 static void
658 determine_invariantness (void)
659 {
660 struct dom_walk_data walk_data;
661
662 memset (&walk_data, 0, sizeof (struct dom_walk_data));
663 walk_data.before_dom_children_before_stmts = determine_invariantness_stmt;
664
665 init_walk_dominator_tree (&walk_data);
666 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
667 fini_walk_dominator_tree (&walk_data);
668 }
669
670 /* Commits edge insertions and updates loop structures. */
671
672 void
673 loop_commit_inserts (void)
674 {
675 unsigned old_last_basic_block, i;
676 basic_block bb;
677
678 old_last_basic_block = last_basic_block;
679 bsi_commit_edge_inserts ();
680 for (i = old_last_basic_block; i < (unsigned) last_basic_block; i++)
681 {
682 bb = BASIC_BLOCK (i);
683 add_bb_to_loop (bb,
684 find_common_loop (single_pred (bb)->loop_father,
685 single_succ (bb)->loop_father));
686 }
687 }
688
689 /* Hoist the statements in basic block BB out of the loops prescribed by
690 data stored in LIM_DATA structures associated with each statement. Callback
691 for walk_dominator_tree. */
692
693 static void
694 move_computations_stmt (struct dom_walk_data *dw_data ATTRIBUTE_UNUSED,
695 basic_block bb)
696 {
697 struct loop *level;
698 block_stmt_iterator bsi;
699 tree stmt;
700 unsigned cost = 0;
701
702 if (!bb->loop_father->outer)
703 return;
704
705 for (bsi = bsi_start (bb); !bsi_end_p (bsi); )
706 {
707 stmt = bsi_stmt (bsi);
708
709 if (!LIM_DATA (stmt))
710 {
711 bsi_next (&bsi);
712 continue;
713 }
714
715 cost = LIM_DATA (stmt)->cost;
716 level = LIM_DATA (stmt)->tgt_loop;
717 free_lim_aux_data (LIM_DATA (stmt));
718 stmt_ann (stmt)->aux = NULL;
719
720 if (!level)
721 {
722 bsi_next (&bsi);
723 continue;
724 }
725
726 /* We do not really want to move conditionals out of the loop; we just
727 placed it here to force its operands to be moved if necessary. */
728 if (TREE_CODE (stmt) == COND_EXPR)
729 continue;
730
731 if (dump_file && (dump_flags & TDF_DETAILS))
732 {
733 fprintf (dump_file, "Moving statement\n");
734 print_generic_stmt (dump_file, stmt, 0);
735 fprintf (dump_file, "(cost %u) out of loop %d.\n\n",
736 cost, level->num);
737 }
738 bsi_insert_on_edge (loop_preheader_edge (level), stmt);
739 bsi_remove (&bsi);
740 }
741 }
742
743 /* Hoist the statements out of the loops prescribed by data stored in
744 LIM_DATA structures associated with each statement.*/
745
746 static void
747 move_computations (void)
748 {
749 struct dom_walk_data walk_data;
750
751 memset (&walk_data, 0, sizeof (struct dom_walk_data));
752 walk_data.before_dom_children_before_stmts = move_computations_stmt;
753
754 init_walk_dominator_tree (&walk_data);
755 walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
756 fini_walk_dominator_tree (&walk_data);
757
758 loop_commit_inserts ();
759 if (need_ssa_update_p ())
760 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
761 }
762
763 /* Checks whether the statement defining variable *INDEX can be hoisted
764 out of the loop passed in DATA. Callback for for_each_index. */
765
766 static bool
767 may_move_till (tree ref, tree *index, void *data)
768 {
769 struct loop *loop = data, *max_loop;
770
771 /* If REF is an array reference, check also that the step and the lower
772 bound is invariant in LOOP. */
773 if (TREE_CODE (ref) == ARRAY_REF)
774 {
775 tree step = array_ref_element_size (ref);
776 tree lbound = array_ref_low_bound (ref);
777
778 max_loop = outermost_invariant_loop_expr (step, loop);
779 if (!max_loop)
780 return false;
781
782 max_loop = outermost_invariant_loop_expr (lbound, loop);
783 if (!max_loop)
784 return false;
785 }
786
787 max_loop = outermost_invariant_loop (*index, loop);
788 if (!max_loop)
789 return false;
790
791 return true;
792 }
793
794 /* Forces statements defining (invariant) SSA names in expression EXPR to be
795 moved out of the LOOP. ORIG_LOOP is the loop in that EXPR is used. */
796
797 static void
798 force_move_till_expr (tree expr, struct loop *orig_loop, struct loop *loop)
799 {
800 enum tree_code_class class = TREE_CODE_CLASS (TREE_CODE (expr));
801 unsigned i, nops;
802
803 if (TREE_CODE (expr) == SSA_NAME)
804 {
805 tree stmt = SSA_NAME_DEF_STMT (expr);
806 if (IS_EMPTY_STMT (stmt))
807 return;
808
809 set_level (stmt, orig_loop, loop);
810 return;
811 }
812
813 if (class != tcc_unary
814 && class != tcc_binary
815 && class != tcc_expression
816 && class != tcc_comparison)
817 return;
818
819 nops = TREE_CODE_LENGTH (TREE_CODE (expr));
820 for (i = 0; i < nops; i++)
821 force_move_till_expr (TREE_OPERAND (expr, i), orig_loop, loop);
822 }
823
824 /* Forces statement defining invariants in REF (and *INDEX) to be moved out of
825 the LOOP. The reference REF is used in the loop ORIG_LOOP. Callback for
826 for_each_index. */
827
828 struct fmt_data
829 {
830 struct loop *loop;
831 struct loop *orig_loop;
832 };
833
834 static bool
835 force_move_till (tree ref, tree *index, void *data)
836 {
837 tree stmt;
838 struct fmt_data *fmt_data = data;
839
840 if (TREE_CODE (ref) == ARRAY_REF)
841 {
842 tree step = array_ref_element_size (ref);
843 tree lbound = array_ref_low_bound (ref);
844
845 force_move_till_expr (step, fmt_data->orig_loop, fmt_data->loop);
846 force_move_till_expr (lbound, fmt_data->orig_loop, fmt_data->loop);
847 }
848
849 if (TREE_CODE (*index) != SSA_NAME)
850 return true;
851
852 stmt = SSA_NAME_DEF_STMT (*index);
853 if (IS_EMPTY_STMT (stmt))
854 return true;
855
856 set_level (stmt, fmt_data->orig_loop, fmt_data->loop);
857
858 return true;
859 }
860
861 /* Records memory reference location *REF to the list MEM_REFS. The reference
862 occurs in statement STMT. */
863
864 static void
865 record_mem_ref_loc (struct mem_ref_loc **mem_refs, tree stmt, tree *ref)
866 {
867 struct mem_ref_loc *aref = xmalloc (sizeof (struct mem_ref_loc));
868
869 aref->stmt = stmt;
870 aref->ref = ref;
871
872 aref->next = *mem_refs;
873 *mem_refs = aref;
874 }
875
876 /* Releases list of memory reference locations MEM_REFS. */
877
878 static void
879 free_mem_ref_locs (struct mem_ref_loc *mem_refs)
880 {
881 struct mem_ref_loc *act;
882
883 while (mem_refs)
884 {
885 act = mem_refs;
886 mem_refs = mem_refs->next;
887 free (act);
888 }
889 }
890
891 /* Rewrites memory references in list MEM_REFS by variable TMP_VAR. */
892
893 static void
894 rewrite_mem_refs (tree tmp_var, struct mem_ref_loc *mem_refs)
895 {
896 tree var;
897 ssa_op_iter iter;
898
899 for (; mem_refs; mem_refs = mem_refs->next)
900 {
901 FOR_EACH_SSA_TREE_OPERAND (var, mem_refs->stmt, iter, SSA_OP_ALL_VIRTUALS)
902 mark_sym_for_renaming (SSA_NAME_VAR (var));
903
904 *mem_refs->ref = tmp_var;
905 update_stmt (mem_refs->stmt);
906 }
907 }
908
909 /* Records request for store motion of memory reference REF from LOOP.
910 MEM_REFS is the list of occurrences of the reference REF inside LOOP;
911 these references are rewritten by a new temporary variable.
912 Exits from the LOOP are stored in EXITS, there are N_EXITS of them.
913 The initialization of the temporary variable is put to the preheader
914 of the loop, and assignments to the reference from the temporary variable
915 are emitted to exits. */
916
917 static void
918 schedule_sm (struct loop *loop, edge *exits, unsigned n_exits, tree ref,
919 struct mem_ref_loc *mem_refs)
920 {
921 struct mem_ref_loc *aref;
922 tree tmp_var;
923 unsigned i;
924 tree load, store;
925 struct fmt_data fmt_data;
926
927 if (dump_file && (dump_flags & TDF_DETAILS))
928 {
929 fprintf (dump_file, "Executing store motion of ");
930 print_generic_expr (dump_file, ref, 0);
931 fprintf (dump_file, " from loop %d\n", loop->num);
932 }
933
934 tmp_var = make_rename_temp (TREE_TYPE (ref), "lsm_tmp");
935
936 fmt_data.loop = loop;
937 fmt_data.orig_loop = loop;
938 for_each_index (&ref, force_move_till, &fmt_data);
939
940 rewrite_mem_refs (tmp_var, mem_refs);
941 for (aref = mem_refs; aref; aref = aref->next)
942 if (LIM_DATA (aref->stmt))
943 LIM_DATA (aref->stmt)->sm_done = true;
944
945 /* Emit the load & stores. */
946 load = build (MODIFY_EXPR, void_type_node, tmp_var, ref);
947 get_stmt_ann (load)->aux = xcalloc (1, sizeof (struct lim_aux_data));
948 LIM_DATA (load)->max_loop = loop;
949 LIM_DATA (load)->tgt_loop = loop;
950
951 /* Put this into the latch, so that we are sure it will be processed after
952 all dependencies. */
953 bsi_insert_on_edge (loop_latch_edge (loop), load);
954
955 for (i = 0; i < n_exits; i++)
956 {
957 store = build (MODIFY_EXPR, void_type_node,
958 unshare_expr (ref), tmp_var);
959 bsi_insert_on_edge (exits[i], store);
960 }
961 }
962
963 /* Check whether memory reference REF can be hoisted out of the LOOP. If this
964 is true, prepare the statements that load the value of the memory reference
965 to a temporary variable in the loop preheader, store it back on the loop
966 exits, and replace all the references inside LOOP by this temporary variable.
967 LOOP has N_EXITS stored in EXITS. CLOBBERED_VOPS is the bitmap of virtual
968 operands that are clobbered by a call or accessed through multiple references
969 in loop. */
970
971 static void
972 determine_lsm_ref (struct loop *loop, edge *exits, unsigned n_exits,
973 bitmap clobbered_vops, struct mem_ref *ref)
974 {
975 struct mem_ref_loc *aref;
976 struct loop *must_exec;
977
978 /* In case the memory is not stored to, there is nothing for SM to do. */
979 if (!ref->is_stored)
980 return;
981
982 /* If the reference is aliased with any different ref, or killed by call
983 in function, then fail. */
984 if (bitmap_intersect_p (ref->vops, clobbered_vops))
985 return;
986
987 if (tree_could_trap_p (ref->mem))
988 {
989 /* If the memory access is unsafe (i.e. it might trap), ensure that some
990 of the statements in that it occurs is always executed when the loop
991 is entered. This way we know that by moving the load from the
992 reference out of the loop we will not cause the error that would not
993 occur otherwise.
994
995 TODO -- in fact we would like to check for anticipability of the
996 reference, i.e. that on each path from loop entry to loop exit at
997 least one of the statements containing the memory reference is
998 executed. */
999
1000 for (aref = ref->locs; aref; aref = aref->next)
1001 {
1002 if (!LIM_DATA (aref->stmt))
1003 continue;
1004
1005 must_exec = LIM_DATA (aref->stmt)->always_executed_in;
1006 if (!must_exec)
1007 continue;
1008
1009 if (must_exec == loop
1010 || flow_loop_nested_p (must_exec, loop))
1011 break;
1012 }
1013
1014 if (!aref)
1015 return;
1016 }
1017
1018 schedule_sm (loop, exits, n_exits, ref->mem, ref->locs);
1019 }
1020
1021 /* Attempts to hoist memory reference described in SLOT out of loop
1022 described in DATA. Callback for htab_traverse. */
1023
1024 struct hmr_data
1025 {
1026 struct loop *loop; /* Loop from that the reference should be hoisted. */
1027 edge *exits; /* Exits of the loop. */
1028 unsigned n_exits; /* Length of the exits array. */
1029 bitmap clobbered_vops;/* The vops clobbered by call in loop or accessed by
1030 multiple memory references. */
1031 };
1032
1033 static int
1034 hoist_memory_reference (void **slot, void *data)
1035 {
1036 struct mem_ref *ref = *slot;
1037 struct hmr_data *hmr_data = data;
1038
1039 determine_lsm_ref (hmr_data->loop, hmr_data->exits, hmr_data->n_exits,
1040 hmr_data->clobbered_vops, ref);
1041
1042 return 1;
1043 }
1044
1045 /* Checks whether LOOP (with N_EXITS exits stored in EXITS array) is suitable
1046 for a store motion optimization (i.e. whether we can insert statement
1047 on its exits). */
1048
1049 static bool
1050 loop_suitable_for_sm (struct loop *loop ATTRIBUTE_UNUSED, edge *exits,
1051 unsigned n_exits)
1052 {
1053 unsigned i;
1054
1055 for (i = 0; i < n_exits; i++)
1056 if (exits[i]->flags & EDGE_ABNORMAL)
1057 return false;
1058
1059 return true;
1060 }
1061
1062 /* A hash function for struct mem_ref object OBJ. */
1063
1064 static hashval_t
1065 memref_hash (const void *obj)
1066 {
1067 const struct mem_ref *mem = obj;
1068
1069 return mem->hash;
1070 }
1071
1072 /* An equality function for struct mem_ref object OBJ1 with
1073 memory reference OBJ2. */
1074
1075 static int
1076 memref_eq (const void *obj1, const void *obj2)
1077 {
1078 const struct mem_ref *mem1 = obj1;
1079
1080 return operand_equal_p (mem1->mem, (tree) obj2, 0);
1081 }
1082
1083 /* A function to free the struct mem_ref object OBJ. */
1084
1085 static void
1086 memref_del (void *obj)
1087 {
1088 struct mem_ref *mem = obj;
1089
1090 free_mem_ref_locs (mem->locs);
1091 BITMAP_FREE (mem->vops);
1092 free (mem);
1093 }
1094
1095 /* Gathers memory references in statement STMT in LOOP, storing the
1096 information about them in MEM_REFS hash table. Note vops accessed through
1097 unrecognized statements in CLOBBERED_VOPS. */
1098
1099 static void
1100 gather_mem_refs_stmt (struct loop *loop, htab_t mem_refs,
1101 bitmap clobbered_vops, tree stmt)
1102 {
1103 tree *lhs, *rhs, *mem = NULL;
1104 hashval_t hash;
1105 PTR *slot;
1106 struct mem_ref *ref = NULL;
1107 ssa_op_iter oi;
1108 tree vname;
1109 bool is_stored;
1110
1111 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
1112 return;
1113
1114 /* Recognize MEM = (SSA_NAME | invariant) and SSA_NAME = MEM patterns. */
1115 if (TREE_CODE (stmt) != MODIFY_EXPR)
1116 goto fail;
1117
1118 lhs = &TREE_OPERAND (stmt, 0);
1119 rhs = &TREE_OPERAND (stmt, 1);
1120
1121 if (TREE_CODE (*lhs) == SSA_NAME)
1122 {
1123 if (!is_gimple_addressable (*rhs))
1124 goto fail;
1125
1126 mem = rhs;
1127 is_stored = false;
1128 }
1129 else if (TREE_CODE (*rhs) == SSA_NAME
1130 || is_gimple_min_invariant (*rhs))
1131 {
1132 mem = lhs;
1133 is_stored = true;
1134 }
1135 else
1136 goto fail;
1137
1138 /* If we cannot create an SSA name for the result, give up. */
1139 if (!is_gimple_reg_type (TREE_TYPE (*mem))
1140 || TREE_THIS_VOLATILE (*mem))
1141 goto fail;
1142
1143 /* If we cannot move the reference out of the loop, fail. */
1144 if (!for_each_index (mem, may_move_till, loop))
1145 goto fail;
1146
1147 hash = iterative_hash_expr (*mem, 0);
1148 slot = htab_find_slot_with_hash (mem_refs, *mem, hash, INSERT);
1149
1150 if (*slot)
1151 ref = *slot;
1152 else
1153 {
1154 ref = xmalloc (sizeof (struct mem_ref));
1155 ref->mem = *mem;
1156 ref->hash = hash;
1157 ref->locs = NULL;
1158 ref->is_stored = false;
1159 ref->vops = BITMAP_ALLOC (NULL);
1160 *slot = ref;
1161 }
1162 ref->is_stored |= is_stored;
1163
1164 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1165 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1166 {
1167 bitmap_set_bit (ref->vops,
1168 var_ann (SSA_NAME_VAR (vname))->uid);
1169 }
1170 record_mem_ref_loc (&ref->locs, stmt, mem);
1171 return;
1172
1173 fail:
1174 FOR_EACH_SSA_TREE_OPERAND (vname, stmt, oi,
1175 SSA_OP_VIRTUAL_USES | SSA_OP_VIRTUAL_KILLS)
1176 {
1177 bitmap_set_bit (clobbered_vops,
1178 var_ann (SSA_NAME_VAR (vname))->uid);
1179 }
1180 }
1181
1182 /* Gathers memory references in LOOP, storing the information about them
1183 in MEM_REFS hash table. Note vops accessed through unrecognized
1184 statements in CLOBBERED_VOPS. */
1185
1186 static void
1187 gather_mem_refs (struct loop *loop, htab_t mem_refs, bitmap clobbered_vops)
1188 {
1189 basic_block *body = get_loop_body (loop);
1190 block_stmt_iterator bsi;
1191 unsigned i;
1192
1193 for (i = 0; i < loop->num_nodes; i++)
1194 {
1195 for (bsi = bsi_start (body[i]); !bsi_end_p (bsi); bsi_next (&bsi))
1196 gather_mem_refs_stmt (loop, mem_refs, clobbered_vops, bsi_stmt (bsi));
1197 }
1198
1199 free (body);
1200 }
1201
1202 /* Finds the vops accessed by memory reference described in SLOT as well as
1203 some other reference(s) and marks them in DATA->clobbered_vops.
1204 Callback for htab_traverse. */
1205
1206 struct fmrv_data
1207 {
1208 bitmap clobbered_vops; /* The vops clobbered by call in loop or accessed by
1209 multiple memory references. */
1210 bitmap all_vops; /* All vops referenced in the loop. */
1211 };
1212
1213 static int
1214 find_more_ref_vops (void **slot, void *data)
1215 {
1216 struct mem_ref *ref = *slot;
1217 struct fmrv_data *fmrv_data = data;
1218 bitmap_head tmp;
1219
1220 /* The vops that are already in all_vops are accessed by more than
1221 one memory reference. */
1222 bitmap_initialize (&tmp, &bitmap_default_obstack);
1223 bitmap_and (&tmp, fmrv_data->all_vops, ref->vops);
1224 bitmap_ior_into (fmrv_data->clobbered_vops, &tmp);
1225 bitmap_clear (&tmp);
1226
1227 bitmap_ior_into (fmrv_data->all_vops, ref->vops);
1228 return 1;
1229 }
1230
1231 /* Try to perform store motion for all memory references modified inside
1232 LOOP. */
1233
1234 static void
1235 determine_lsm_loop (struct loop *loop)
1236 {
1237 unsigned n_exits;
1238 edge *exits = get_loop_exit_edges (loop, &n_exits);
1239 htab_t mem_refs;
1240 struct hmr_data hmr_data;
1241 struct fmrv_data fmrv_data;
1242 bitmap clobbered_vops;
1243
1244 if (!loop_suitable_for_sm (loop, exits, n_exits))
1245 {
1246 free (exits);
1247 return;
1248 }
1249
1250 mem_refs = htab_create (100, memref_hash, memref_eq, memref_del);
1251
1252 /* Find the memory references in LOOP. */
1253 clobbered_vops = BITMAP_ALLOC (NULL);
1254 gather_mem_refs (loop, mem_refs, clobbered_vops);
1255
1256 /* Find the vops that are used for more than one reference. */
1257 fmrv_data.all_vops = BITMAP_ALLOC (NULL);
1258 fmrv_data.clobbered_vops = clobbered_vops;
1259 htab_traverse (mem_refs, find_more_ref_vops, &fmrv_data);
1260 BITMAP_FREE (fmrv_data.all_vops);
1261
1262 /* Hoist all suitable memory references. */
1263 hmr_data.loop = loop;
1264 hmr_data.exits = exits;
1265 hmr_data.n_exits = n_exits;
1266 hmr_data.clobbered_vops = clobbered_vops;
1267 htab_traverse (mem_refs, hoist_memory_reference, &hmr_data);
1268
1269 htab_delete (mem_refs);
1270 free (exits);
1271 BITMAP_FREE (clobbered_vops);
1272 }
1273
1274 /* Try to perform store motion for all memory references modified inside
1275 any of LOOPS. */
1276
1277 static void
1278 determine_lsm (struct loops *loops)
1279 {
1280 struct loop *loop;
1281
1282 if (!loops->tree_root->inner)
1283 return;
1284
1285 /* Pass the loops from the outermost and perform the store motion as
1286 suitable. */
1287
1288 loop = loops->tree_root->inner;
1289 while (1)
1290 {
1291 determine_lsm_loop (loop);
1292
1293 if (loop->inner)
1294 {
1295 loop = loop->inner;
1296 continue;
1297 }
1298 while (!loop->next)
1299 {
1300 loop = loop->outer;
1301 if (loop == loops->tree_root)
1302 {
1303 loop_commit_inserts ();
1304 return;
1305 }
1306 }
1307 loop = loop->next;
1308 }
1309 }
1310
1311 /* Fills ALWAYS_EXECUTED_IN information for basic blocks of LOOP, i.e.
1312 for each such basic block bb records the outermost loop for that execution
1313 of its header implies execution of bb. CONTAINS_CALL is the bitmap of
1314 blocks that contain a nonpure call. */
1315
1316 static void
1317 fill_always_executed_in (struct loop *loop, sbitmap contains_call)
1318 {
1319 basic_block bb = NULL, *bbs, last = NULL;
1320 unsigned i;
1321 edge e;
1322 struct loop *inn_loop = loop;
1323
1324 if (!loop->header->aux)
1325 {
1326 bbs = get_loop_body_in_dom_order (loop);
1327
1328 for (i = 0; i < loop->num_nodes; i++)
1329 {
1330 edge_iterator ei;
1331 bb = bbs[i];
1332
1333 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1334 last = bb;
1335
1336 if (TEST_BIT (contains_call, bb->index))
1337 break;
1338
1339 FOR_EACH_EDGE (e, ei, bb->succs)
1340 if (!flow_bb_inside_loop_p (loop, e->dest))
1341 break;
1342 if (e)
1343 break;
1344
1345 /* A loop might be infinite (TODO use simple loop analysis
1346 to disprove this if possible). */
1347 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1348 break;
1349
1350 if (!flow_bb_inside_loop_p (inn_loop, bb))
1351 break;
1352
1353 if (bb->loop_father->header == bb)
1354 {
1355 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1356 break;
1357
1358 /* In a loop that is always entered we may proceed anyway.
1359 But record that we entered it and stop once we leave it. */
1360 inn_loop = bb->loop_father;
1361 }
1362 }
1363
1364 while (1)
1365 {
1366 last->aux = loop;
1367 if (last == loop->header)
1368 break;
1369 last = get_immediate_dominator (CDI_DOMINATORS, last);
1370 }
1371
1372 free (bbs);
1373 }
1374
1375 for (loop = loop->inner; loop; loop = loop->next)
1376 fill_always_executed_in (loop, contains_call);
1377 }
1378
1379 /* Compute the global information needed by the loop invariant motion pass.
1380 LOOPS is the loop tree. */
1381
1382 static void
1383 tree_ssa_lim_initialize (struct loops *loops)
1384 {
1385 sbitmap contains_call = sbitmap_alloc (last_basic_block);
1386 block_stmt_iterator bsi;
1387 struct loop *loop;
1388 basic_block bb;
1389
1390 sbitmap_zero (contains_call);
1391 FOR_EACH_BB (bb)
1392 {
1393 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1394 {
1395 if (nonpure_call_p (bsi_stmt (bsi)))
1396 break;
1397 }
1398
1399 if (!bsi_end_p (bsi))
1400 SET_BIT (contains_call, bb->index);
1401 }
1402
1403 for (loop = loops->tree_root->inner; loop; loop = loop->next)
1404 fill_always_executed_in (loop, contains_call);
1405
1406 sbitmap_free (contains_call);
1407 }
1408
1409 /* Cleans up after the invariant motion pass. */
1410
1411 static void
1412 tree_ssa_lim_finalize (void)
1413 {
1414 basic_block bb;
1415
1416 FOR_EACH_BB (bb)
1417 {
1418 bb->aux = NULL;
1419 }
1420 }
1421
1422 /* Moves invariants from LOOPS. Only "expensive" invariants are moved out --
1423 i.e. those that are likely to be win regardless of the register pressure. */
1424
1425 void
1426 tree_ssa_lim (struct loops *loops)
1427 {
1428 tree_ssa_lim_initialize (loops);
1429
1430 /* For each statement determine the outermost loop in that it is
1431 invariant and cost for computing the invariant. */
1432 determine_invariantness ();
1433
1434 /* For each memory reference determine whether it is possible to hoist it
1435 out of the loop. Force the necessary invariants to be moved out of the
1436 loops as well. */
1437 determine_lsm (loops);
1438
1439 /* Move the expressions that are expensive enough. */
1440 move_computations ();
1441
1442 tree_ssa_lim_finalize ();
1443 }